#!/usr/bin/env python
# encoding=utf8

## Database: status error codes
#  0 : not parsed
#  1 : parsed correctly
#  2 : paged not found
#  3 : attempted but something went wrong

# Set email specification to receive news from the bot
##
# me == my email address
# you == recipient's email address
me = "" # REDACTED
you = "" # REDACTED
pwd = "" # REDACTED
# Every how many pages do you want to receive an email?
mail_msg_threshold = 100
mail_spec = {'me' : me, 'you' : you, 'pwd' : pwd, 'msg_threshold' : mail_msg_threshold}
##

import os
import sqlite3
import re
import json
from random import randint
from time import sleep
import datetime
import sys
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException

import html2text
from smtplib import SMTP_SSL as SMTP
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText


def queryDb(db):

    try:
        conn = sqlite3.connect(db)
        cursor = conn.cursor()
        cursor.execute("SELECT fb_id FROM fbUsersTimeline WHERE status = 0")
    except Exception,e:
        print str(e)

    query_list = []
    
    for (fb_id,) in cursor.fetchall():
        query_list.append(fb_id.encode("utf-8"))

    return query_list
    

def enterData(db, fb_id, min_year, list_years, status):

    # Prepare timestamp
    now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")

    try:
        conn = sqlite3.connect(db)
        cursor = conn.cursor()
        cursor.execute("UPDATE fbUsersTimeline SET joined = ?, active_years = ?, status = ? WHERE fb_id = ?;", (min_year, json.dumps(list_years), status, fb_id))
        conn.commit()    

        return

    except Exception,e:
        print str(e)

def composeUrl(fb_id):

    regexp = re.escape('://www.facebook.com/')
    
    if re.search(regexp, fb_id) is not None:
        return fb_id
    else:
        return 'https://www.facebook.com/' + fb_id

    
def parseYears(unicode):

    try: 
        ## Test
        # unicode = 'Recent\n2014\n2013\n2012\n2011\nBorn'
        str = unicode.encode("utf-8")
        years = [int(s) for s in str.split('\n') if s.isdigit()]
        if len(years) != 0:
            return years
        else:
            return None 
    except:
        return None

# Expect three arguments: msg (string or list), error (bool), end (bool)
# Return an html formatted string
def composeMailMessage(error):

    if error == True:
        html = "<p>Hi!<br>Here the bot scraping Facebook users timelines.\n<br>Something went worng and I quit the job.<br>Have a nice day</p>"
        return html

    else:
        html = "<p>Hi!<br>Here the bot scraping Facebook users timelines.\n<br>Mission accomplished.<br>Have a nice day.</p>"
        return html


def sendMail(me, you, pwd, html):

    try:
    
        # Create message container - the correct MIME type is multipart/alternative.
        msg = MIMEMultipart('alternative')
        msg['Subject'] = "Message from your bot scraping Facebook users timelines"
        msg['From'] = me
        msg['To'] = you 

        # Create the body of the message (a plain-text and an HTML version).
        text = html2text.html2text(html)
        html = """\
        <html>
        <head></head>
        <body>
        %s
        </body>
        </html>
        """ % (html)

        # Record the MIME types of both parts - text/plain and text/html.
        part1 = MIMEText(text, 'plain')
        part2 = MIMEText(html, 'html')

        # Attach parts into message container.
        # According to RFC 2046, the last part of a multipart message, in this case
        # the HTML message, is best and preferred.
        msg.attach(part1)
        msg.attach(part2)

        # Send the message via local SMTP server.
        mailServer = smtplib.SMTP("smtp.gmail.com", 587)
        mailServer.ehlo()
        mailServer.starttls()
        mailServer.ehlo()
        mailServer.login(me, pwd)
        # sendmail function takes 3 arguments: sender's address, recipient's address
        # and message to send - here it is sent as one string.
        mailServer.sendmail(me, you, msg.as_string())
        mailServer.close()

    except Exception,e:
        print str(e)
        pass
        
    return

def main():

    db = 'fb_users_timeline.sqlite'
    chromedriver = "chromedriver"
    os.environ["webdriver.chrome.driver"] = chromedriver

    query_list = queryDb(db)
    
    driver = webdriver.Chrome(chromedriver)
    driver.implicitly_wait(60)

    for fb_id in query_list:

        print fb_id
        driver.get(composeUrl(fb_id))

        try:
            element = WebDriverWait(driver, 30).until(
                EC.presence_of_element_located((By.CLASS_NAME, "fbTimelineScrubber")))

        except:
                html = driver.page_source
                soup = BeautifulSoup(html)
                regexp = re.compile(r'Page not found')
                if regexp.search(soup.head.title.text) is not None:
                    print "Page not found"
                    enterData(db, fb_id, None, None, 2)
                    sleep(randint(10,40))
                    continue
                elif soup.find('div', id=re.compile('^PageScrubberPagelet_')) is not None:
                    timeline_summary = soup.find('div', id=re.compile('^PageScrubberPagelet_'))
                    items = ''
                    for NavigableString in timeline_summary.findAll(text=True):
                        txt = unicode(NavigableString)
                        items = items + txt + '\n'
                    if active_years is not None:
                        active_years = parseYears(items)
                        enterData(db, fb_id, min(active_years), active_years, 1)
                    else:
                        enterData(db, fb_id, None, None, 3)
                    sleep(randint(10,40))
                    continue
                    
                else:
                    driver.quit()
                    mail_message = composeMailMessage(error=True)
                    sendMail(me, you, pwd, mail_message)
                    sys.exit()

        else:
            try:
                timeline_summary = driver.find_element_by_class_name("fbTimelineScrubber")
                active_years = parseYears(timeline_summary.text)
                if active_years is not None:
                    enterData(db, fb_id, min(active_years), active_years, 1)
                    sleep(randint(10,40))
                else:
                    enterData(db, fb_id, None, None, 3)
                    sleep(randint(10,40))

            except NoSuchElementException, e:
                print str(e)
                sleep(randint(10,40))
                continue

    mail_message = composeMailMessage(error=False)
    sendMail(me, you, pwd, mail_message)
    driver.quit()
        
main()
